library(httr)
library(data.table)
library(dplyr)
library(lubridate)
library(knitr)
library(highcharter)
library(DT)
library(caret)
library(tibble)
library(rsample)
library(jtools)
fread("https://data.geo.admin.ch/ch.meteoschweiz.klima/nbcn-tageswerte/nbcn-daily_BAS_previous.csv", sep = ";", colClasses = c("character", "Date", rep("numeric", 10))) %>%
mutate(
timestamp = as.POSIXct(
paste0(substr(date, 1, 4), "-", substr(date, 5, 6), "-", substr(date, 7, 8), " 00:00:00"),
format="%Y-%m-%d %H:%M:%S"
)
) %>%
mutate_at(c("gre000d0", "hto000d0", "nto000d0", "prestad0", "rre150d0", "sre000d0", "tre200d0", "tre200dn", "tre200dx", "ure200d0"), as.numeric) %>%
bind_rows(
fread("https://data.geo.admin.ch/ch.meteoschweiz.klima/nbcn-tageswerte/nbcn-daily_BAS_current.csv", sep = ";", colClasses = c("character", "Date", rep("numeric", 10))) %>%
mutate(
timestamp = as.POSIXct(
paste0( substr(date, 1, 4), "-", substr(date, 5, 6), "-", substr(date, 7, 8), " 00:00:00"),
format="%Y-%m-%d %H:%M:%S", tz="Europe/Berlin"
)
) %>%
mutate_at(c("gre000d0", "hto000d0", "nto000d0", "prestad0", "rre150d0", "sre000d0", "tre200d0", "tre200dn", "tre200dx", "ure200d0"), as.numeric)
) %>%
relocate(timestamp) %>%
select(-c(date, `station/location`)) %>%
mutate(
year = as.numeric(substr(timestamp, 1, 4)),
month = as.numeric(substr(timestamp, 6, 7)),
day = as.numeric(substr(timestamp, 9, 10))
) %>%
data.frame() %>%
assign("meteo", ., inherits = TRUE)
httr::GET("https://data.bs.ch/explore/dataset/100074/download/?format=csv&timezone=Europe%2FBerlin") %>%
content(., "text") %>%
fread(sep=";") %>%
select(tag_datum, name, code, kategorie_name) %>%
filter(name != "Fasnachtsmontag", name != "Fasnachtsmittwoch", name != "Dies Academicus") %>%
filter(!(tag_datum == "2008-05-01 00:00:00" & name == "Tag der Arbeit")) %>% # Tag der Arbeit doubles with Auffahrt
filter(kategorie_name %in% c("Feiertag", "Ferien") | code == "herbstm") %>%
filter(name != "Semesterferien") %>%
assign("rd_veranst", ., inherits = TRUE)
rd_veranst %>%
data.frame() %>%
mutate(Herbstmesse = if_else(code == "herbstm", "Herbstmesse", "")) %>%
select(tag_datum, Herbstmesse) %>%
filter(Herbstmesse != "") %>%
full_join(
rd_veranst %>%
mutate(Feiertage = if_else(kategorie_name == "Feiertag", name, "")) %>%
select(tag_datum, Feiertage) %>%
filter(Feiertage != ""),
by = "tag_datum"
) %>%
full_join(
rd_veranst %>%
mutate(Ferien = if_else(kategorie_name == "Ferien", name, "")) %>%
select(tag_datum, Ferien) %>%
filter(Ferien != ""),
by = "tag_datum"
) %>%
mutate(timestamp = as.POSIXct(tag_datum, tz="Europe/Berlin")) %>%
filter(year(timestamp) > 2011) %>%
select(timestamp, Herbstmesse, Feiertage, Ferien) %>%
mutate(
year = lubridate::year(timestamp),
month = lubridate::month(lubridate::floor_date(timestamp, "month")),
day = lubridate::day(lubridate::floor_date(timestamp, "day"))
) %>%
data.frame() %>%
assign("Veranstaltungen", ., inherits = TRUE)
rm(events, rd_veranst)
httr::GET("https://data.bs.ch/explore/dataset/100304/download/?format=csv&timezone=Europe%2FBerlin") %>%
content(., "text") %>%
fread(sep=";") %>%
group_by(year, month, day) %>%
filter(year > 2011) %>%
summarise(gasverbrauch = sum(value, na.rm = T)) %>%
ungroup() %>%
mutate(timestamp = as.POSIXct(
paste0(year, "-", month, "-", day, " 00:00:00"), format="%Y-%m-%d %H:%M:%S", tz="Europe/Berlin"
)
) %>%
mutate(gasverbrauch=gasverbrauch/1000000) %>%
relocate(timestamp) %>%
assign("gas_daily", ., inherits = TRUE)
meteo %>%
full_join(Veranstaltungen %>%
select(-timestamp), by = c("year" = "year", "month" = "month", "day" = "day")) %>%
full_join(gas_daily %>%
select(-timestamp), by = c("year" = "year", "month" = "month", "day" = "day")) %>%
mutate(weekday = lubridate::wday(as.Date(timestamp), label = TRUE, abbr = TRUE),
daytype = if_else(weekday %in% c(1,7), "Wochenende", "Werktage")) %>%
relocate(timestamp, gasverbrauch) %>%
filter(!is.na(gasverbrauch)) %>%
mutate(HGT = if_else(tre200d0 <= 12, 20-tre200d0, 0)) %>%
mutate(Herbstmesse = if_else(is.na(Herbstmesse), "No", Herbstmesse),
Feiertage = if_else(is.na(Feiertage), "No", Feiertage),
Ferien = if_else(is.na(Ferien), "No", Ferien)) %>%
mutate(time = as.Date(timestamp, tz = 'Europe/Berlin')) %>%
select(-timestamp) %>%
relocate(time) %>%
slice(1:(n() - 1)) %>%
assign("Data", ., inherits = TRUE)
# Variable für Energiespar-Kampagnen
start_date= as.Date("2022-09-01")
end_date= as.Date("2023-01-31")
Data %>%
mutate(time = as.Date(time),
Feiertage_dummy = if_else(Feiertage == "No", 0, 1),
Ferien_dummy = if_else(Ferien == "No", 0, 1),
Herbstmesse_dummy = if_else(Herbstmesse == "No", 0, 1),
Wochenende_dummy = if_else(daytype == "Werktage", 0, 1),
Energiespar_Kampagnen = ifelse(as.Date(time) %within% interval(start_date, end_date), 1,0),
year_month = paste(year,
formatC(month, width = 2, format = "d", flag = "0"),
sep="-"),
Energiespar_Kampagnen_monatlich = ifelse(Energiespar_Kampagnen == 1, as.character(year_month), "0"),
weekday = factor(weekday, ordered = FALSE)
) %>%
slice(1:(n() - 1)) %>% # Unvollständigen Tag für den Gasverbrauch entfernen.
assign("Data_selec", ., inherits = TRUE)
Data_selec_model <- Data_selec %>%
filter(time < as.Date("2024-10-31"))
set.seed(12345)
inTraining <- createDataPartition(Data_selec_model$gasverbrauch, p = .7, list = FALSE)
training <- Data_selec_model[ inTraining,]
testing <- Data_selec_model[-inTraining,]
fitControl <- trainControl(method = "repeatedcv",
number = 10,
repeats = 10)
set.seed(54321)
ols_Fit1 <- train(gasverbrauch ~ time + as.factor(month) + weekday +
as.factor(Ferien_dummy) + as.factor(Feiertage_dummy) + as.factor(Herbstmesse_dummy) +
gre000d0 + hto000d0 + nto000d0 + prestad0 + rre150d0 +
sre000d0 +
tre200d0 + tre200dn + tre200dx + ure200d0 +
Energiespar_Kampagnen +
HGT +
I(HGT^2) + I(tre200d0^2),
data = training,
method = "lm",
trControl = fitControl,
verbose = TRUE)
# ols_Fit1
# ols_Fit1$resample
# summary(ols_Fit1)
ols_Fit1$results %>%
round(digits = 3) -> ols_Fit1$results
data.frame(
RMSE = paste0(ols_Fit1$results[2], " ± ", ols_Fit1$results[5]),
Rsquared = paste0(ols_Fit1$results[3], " ± ", ols_Fit1$results[6]),
MAE = paste0(ols_Fit1$results[4], " ± ", ols_Fit1$results[7])
) %>%
kable()
| RMSE | Rsquared | MAE |
|---|---|---|
| 0.912 ± 0.072 | 0.966 ± 0.006 | 0.701 ± 0.056 |
testing %>%
mutate(pred = predict(ols_Fit1, newdata = testing)) %>%
select(obs = gasverbrauch, pred) %>%
defaultSummary() %>%
round(digits = 3) -> performance_test
data.frame(
RMSE = performance_test[1],
Rsquared = performance_test[2],
MAE = performance_test[3]
) %>%
kable(row.names = F, align = "lll")
| RMSE | Rsquared | MAE |
|---|---|---|
| 0.974 | 0.959 | 0.741 |
final_model <- lm(gasverbrauch ~ time + as.factor(month) + weekday +
as.factor(Ferien_dummy) + as.factor(Feiertage_dummy) + as.factor(Herbstmesse_dummy) +
gre000d0 + hto000d0 +
# nto000d0 + prestad0 + rre150d0 + sre000d0 +
tre200d0 + tre200dn +
# tre200dx + ure200d0 +
Energiespar_Kampagnen +
# HGT + I(HGT^2) +
I(tre200d0^2),
data = Data_selec_model)
summ(final_model,
model.info = T,
model.fit = F,
digits = getOption("jtools-digits", 3),
stars = T,
robust=T
)
## MODEL INFO:
## Observations: 1156
## Dependent Variable: gasverbrauch
## Type: OLS linear regression
##
## Standard errors: Robust, type = HC3
## ----------------------------------------------------------------------
## Est. S.E. t val. p
## ----------------------------------- -------- ------- --------- -------
## (Intercept) 51.680 1.801 28.701 0.000
## time -0.002 0.000 -18.924 0.000
## as.factor(month)2 -0.620 0.176 -3.524 0.000
## as.factor(month)3 -1.292 0.173 -7.452 0.000
## as.factor(month)4 -2.776 0.197 -14.055 0.000
## as.factor(month)5 -3.564 0.198 -18.033 0.000
## as.factor(month)6 -3.401 0.213 -15.999 0.000
## as.factor(month)7 -3.073 0.218 -14.102 0.000
## as.factor(month)8 -3.529 0.207 -17.061 0.000
## as.factor(month)9 -3.407 0.190 -17.897 0.000
## as.factor(month)10 -2.639 0.165 -15.989 0.000
## as.factor(month)11 -1.581 0.188 -8.414 0.000
## as.factor(month)12 0.028 0.194 0.146 0.884
## weekdayMo 0.009 0.105 0.089 0.929
## weekdayDi -0.022 0.104 -0.209 0.835
## weekdayMi -0.051 0.098 -0.524 0.601
## weekdayDo -0.150 0.097 -1.545 0.123
## weekdayFr -0.650 0.103 -6.303 0.000
## weekdaySa -0.685 0.100 -6.869 0.000
## as.factor(Ferien_dummy)1 -0.430 0.080 -5.362 0.000
## as.factor(Feiertage_dummy)1 -0.474 0.156 -3.035 0.002
## as.factor(Herbstmesse_dummy)1 -0.720 0.153 -4.720 0.000
## gre000d0 -0.002 0.001 -3.611 0.000
## hto000d0 -0.284 0.076 -3.760 0.000
## tre200d0 -0.872 0.030 -28.645 0.000
## tre200dn -0.067 0.022 -3.093 0.002
## Energiespar_Kampagnen -1.709 0.109 -15.688 0.000
## I(tre200d0^2) 0.019 0.001 24.995 0.000
## ----------------------------------------------------------------------
| RMSE | Rsquared | MAE |
|---|---|---|
| 0.902 | 0.966 | 0.695 |
Data_selec %>%
bind_cols(
predict(final_model, Data_selec, interval = "prediction")
) %>%
slice(1:(n() - 1)) -> Data_model_ols
highchart(type = "stock") %>%
hc_add_series(Data_model_ols, "line", hcaes(time, gasverbrauch), color = "#008AC3",
tooltip = list(pointFormat = "Effektiver Gasverbrauch: {point.gasverbrauch:.2f} GWh",
shared = TRUE),
zIndex = 1) %>%
hc_xAxis(title = list(text = "")) %>%
hc_add_series(Data_model_ols, "line", hcaes(time, fit), color = "#B375AB",
tooltip = list(pointFormat = "Erwarteter Gasverbrauch: {point.fit:.2f} GWh",
shared = TRUE),
zIndex = 2) %>%
hc_add_series(Data_model_ols, type = "arearange",
hcaes(x = time, low = lwr, high = upr),
zIndex = 0,
color = "#E7CEE2",
tooltip = list(pointFormat = "95% Konfidenzintervall: {point.lwr:.2f} - {point.upr:.2f} GWh"), shared = TRUE
) %>%
hc_yAxis(floor=0, title = list(text = ""), opposite = FALSE) %>%
hc_plotOptions(series = list(marker = list(enabled = FALSE))) %>%
hc_rangeSelector(selected = 0)
write.csv2(Data_model_ols %>%
mutate(vgl_real_minus_forecast = gasverbrauch - fit) %>%
select(
time,
gasverbrauch,
forecast = fit,
vgl_real_minus_forecast,
forecast_lowFI = lwr,
forecast_highFI = upr
),
"100353_gasverbrauch.csv", row.names=F, na = "")
Hinweis: Je nach Kombination von Betriebssystemen und Versionen von RStudio, R und den verwendeten Pakete können die Ergebnisse leicht von den publizierten Resultaten abweichen. Die angewendete Konfiguration lautet:
sessionInfo()
## R version 4.3.3 (2024-02-29 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 11 x64 (build 22631)
##
## Matrix products: default
##
##
## locale:
## [1] LC_COLLATE=German_Switzerland.utf8 LC_CTYPE=German_Switzerland.utf8
## [3] LC_MONETARY=German_Switzerland.utf8 LC_NUMERIC=C
## [5] LC_TIME=German_Switzerland.utf8
##
## time zone: Europe/Zurich
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] jtools_2.3.0 rsample_1.2.1 tibble_3.2.1 caret_7.0-1
## [5] lattice_0.22-5 ggplot2_3.5.1 DT_0.33 highcharter_0.9.4
## [9] knitr_1.50 lubridate_1.9.4 dplyr_1.1.4 data.table_1.17.0
## [13] httr_1.4.7
##
## loaded via a namespace (and not attached):
## [1] tidyselect_1.2.1 timeDate_4041.110 fastmap_1.2.0
## [4] pROC_1.18.5 digest_0.6.37 rpart_4.1.23
## [7] timechange_0.3.0 lifecycle_1.0.4 survival_3.5-8
## [10] magrittr_2.0.3 compiler_4.3.3 rlang_1.1.5
## [13] sass_0.4.9 tools_4.3.3 igraph_2.1.4
## [16] yaml_2.3.10 htmlwidgets_1.6.4 curl_6.2.0
## [19] plyr_1.8.9 TTR_0.24.4 withr_3.0.2
## [22] purrr_1.0.2 stats4_4.3.3 nnet_7.3-19
## [25] grid_4.3.3 xts_0.14.1 colorspace_2.1-1
## [28] future_1.34.0 globals_0.16.3 scales_1.3.0
## [31] iterators_1.0.14 MASS_7.3-60.0.1 cli_3.6.3
## [34] rmarkdown_2.29 generics_0.1.3 rlist_0.4.6.2
## [37] rstudioapi_0.17.1 future.apply_1.11.3 reshape2_1.4.4
## [40] cachem_1.1.0 pander_0.6.6 stringr_1.5.1
## [43] splines_4.3.3 assertthat_0.2.1 parallel_4.3.3
## [46] vctrs_0.6.5 sandwich_3.1-1 hardhat_1.4.1
## [49] Matrix_1.6-5 jsonlite_1.8.9 bookdown_0.42
## [52] listenv_0.9.1 foreach_1.5.2 gower_1.0.2
## [55] tidyr_1.3.1 jquerylib_0.1.4 recipes_1.1.1
## [58] quantmod_0.4.26 glue_1.8.0 parallelly_1.42.0
## [61] codetools_0.2-19 stringi_1.8.4 gtable_0.3.6
## [64] broom.mixed_0.2.9.6 munsell_0.5.1 furrr_0.3.1
## [67] pillar_1.10.1 htmltools_0.5.8.1 ipred_0.9-15
## [70] lava_1.8.1 R6_2.5.1 evaluate_1.0.3
## [73] backports_1.5.0 broom_1.0.7 bslib_0.9.0
## [76] class_7.3-22 Rcpp_1.0.14 nlme_3.1-164
## [79] prodlim_2024.06.25 xfun_0.51 forcats_1.0.0
## [82] zoo_1.8-13 pkgconfig_2.0.3 ModelMetrics_1.2.2.2
Gasverbrauch im Versorgungsgebiet der IWB: https://data.bs.ch/explore/dataset/100304/
Effektiver und erwarteter täglicher Gasverbrauch: https://data.bs.ch/explore/dataset/100353/
Der Code des Modells kann selber ausgeführt und weiterentwickelt werden. Hierfür wird Renku verwendet. Renku ist eine Plattform, die verschiedene Werkzeuge für reproduzierbare und kollaborative Datenanalyseprojekte bündelt: https://renkulab.io/projects/stata/reproducible/erwarteter-gasverbrauch-basel-stadt
Webartikel: https://www.statistik.bs.ch/aktuell/gasverbrauch-2023.html